Import Dataset and create working dataset:
#install.packages("plotly")
library(plotly)
package 㤼㸱plotly㤼㸲 was built under R version 4.0.4Loading required package: ggplot2
Learn more about the underlying theory at https://ggplot2-book.org/
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
library(reshape2)
package 㤼㸱reshape2㤼㸲 was built under R version 4.0.3
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages --------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
v tibble 3.0.3 v dplyr 1.0.2
v tidyr 1.1.2 v stringr 1.4.0
v readr 1.3.1 v forcats 0.5.0
v purrr 0.3.4
-- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks plotly::filter(), stats::filter()
x dplyr::lag() masks stats::lag()
games <- read.csv("../../data/vgsales.csv")
game_melt <- melt(data=games,id.vars = c("Rank","Name","Platform","Year","Genre","Publisher"),measure.vars=c("NA_Sales","EU_Sales","JP_Sales","Other_Sales","Global_Sales"))
game_melt$Year <- as.integer(game_melt$Year)
NAs introduced by coercion
colnames(game_melt)[7] <- "Region"
colnames(game_melt)[8] <- "Copies Sold"
head(game_melt)
Graph time baby!
#Number of Copies Sold
graph1 <- game_melt %>%
filter(Region == "NA_Sales") %>%
group_by(Region,Year,Genre) %>%
summarise("Copies Sold" = sum(`Copies Sold`)) %>%
ggplot() +
aes(x=Year,
y=`Copies Sold`,
fill = Genre) +
geom_bar(stat="identity")+
theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2)) +
ylab("Number of Copies Sold (in millions)")
`summarise()` regrouping output by 'Region', 'Year' (override with `.groups` argument)
ggplotly(graph1)
Removed 12 rows containing missing values (position_stack).
#Number Releases
graph2 <- game_melt %>%
filter(Region == "NA_Sales") %>%
group_by(Region,Year,Genre) %>%
count(Genre) %>%
rename(`Number of Releases`="n")%>%
ggplot() +
aes(x=Year,
y=`Number of Releases`,
fill = Genre) +
geom_bar(stat="identity")+
theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2))+
ylab("Number of Games Released")
ggplotly(graph2)
Removed 12 rows containing missing values (position_stack).
is.list(y)
[1] TRUE
testfxc1 <-function(reg,plat,gen,pub,years) {
# Input: List of Regions, Platforms, Genres, Publishers, Min and Max Year
# Output: Graph
#
# Create subset based on filters
# Pass to graph
# Output graph
if ("Global_Sales" %in% reg){
filter_region = list("Global_Sales")
} else {
filter_region = reg
}
if ("all" %in% plat){
filter_plat = unique(game_melt$Platform)
} else {
filter_plat = plat
}
if ("all" %in% gen){
filter_gen = unique(game_melt$Genre)
} else {
filter_gen = gen
}
if ("all" %in% pub){
filter_pub = unique(game_melt$Publisher)
} else {
filter_pub = pub
}
min_year = years[1]
max_year = years[2]
graph1 <- game_melt[,3:8] %>%
subset(Region %in% filter_region & Platform %in% filter_plat & Genre %in% filter_gen & Publisher %in% filter_pub & Year >= min_year & Year <= max_year) %>%
group_by(Year,Genre) %>%
summarise("Copies Sold" = sum(`Copies Sold`)) %>%
ggplot() +
aes(x=as.factor(Year),
y=`Copies Sold`,
fill = Genre) +
geom_bar(stat="identity")+
theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2)) +
ylab("Total Game Copies Sold (in millions)")
return (ggplotly(graph1))
}
testfxc2 <-function(reg,plat,gen,pub,years) {
# Input: List of Regions, Platforms, Genres, Publishers, Min and Max Year
# Output: Graph
#
# Create subset based on filters
# Pass to graph
# Output graph
if ("Global_Sales" %in% reg){
filter_region = list("Global_Sales")
} else {
filter_region = reg
}
if ("all" %in% plat){
filter_plat = unique(game_melt$Platform)
} else {
filter_plat = plat
}
if ("all" %in% gen){
filter_gen = unique(game_melt$Genre)
} else {
filter_gen = gen
}
if ("all" %in% pub){
filter_pub = unique(game_melt$Publisher)
} else {
filter_pub = pub
}
min_year = years[1]
max_year = years[2]
graph2 <- game_melt[,3:8] %>%
subset(Region %in% filter_region & Platform %in% filter_plat & Genre %in% filter_gen & Publisher %in% filter_pub & Year >= min_year & Year <= max_year) %>%
group_by(Year,Genre) %>%
count(Year,Genre) %>%
rename(`Number of Releases`="n") %>%
ggplot() +
aes(x=as.factor(Year),
y=`Number of Releases`,
fill = Genre) +
geom_bar(stat="identity")+
theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2))+
ylab("Number of Games Released")
return (ggplotly(graph2))
}
filter_region = list("JP_Sales")
filter_plat = list("N64")
filter_gen = unique(game_melt$Genre)
filter_pub = unique(game_melt$Publisher)
min_year = 1980
max_year = 1990
test <- game_melt %>%
subset(Region %in% filter_region & Platform %in% filter_plat & Genre %in% filter_gen & Publisher %in% filter_pub & Year >= min_year & Year <= max_year)
dim(test)
[1] 0 8
---
title: "R Notebook"
output: html_notebook
---
Import Dataset and create working dataset:
```{r}
#install.packages("plotly")
library(plotly)
library(reshape2)
library(tidyverse)
games <- read.csv("../../data/vgsales.csv")
game_melt <- melt(data=games,id.vars = c("Rank","Name","Platform","Year","Genre","Publisher"),measure.vars=c("NA_Sales","EU_Sales","JP_Sales","Other_Sales","Global_Sales"))
game_melt$Year <- as.integer(game_melt$Year)
colnames(game_melt)[7] <- "Region"
colnames(game_melt)[8] <- "Copies Sold"
head(game_melt)
```
Graph time baby!
```{r}
#Number of Copies Sold
graph1 <- game_melt %>% 
    filter(Region == "NA_Sales") %>%
    group_by(Region,Year,Genre) %>%
    summarise("Copies Sold" = sum(`Copies Sold`)) %>% 
    ggplot() +
    aes(x=Year,
        y=`Copies Sold`,
        fill = Genre) + 
    geom_bar(stat="identity")+
    theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2)) +
    ylab("Number of Copies Sold (in millions)")
    
ggplotly(graph1)
```

```{r}
#Number Releases
graph2 <- game_melt %>% 
    filter(Region == "NA_Sales") %>%
    group_by(Region,Year,Genre) %>%
    count(Genre) %>%
    rename(`Number of Releases`="n")%>% 
    ggplot() +
    aes(x=Year,
        y=`Number of Releases`,
        fill = Genre) + 
    geom_bar(stat="identity")+
    theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2))+
    ylab("Number of Games Released")
ggplotly(graph2)
```
```{r}
#Number of Platforms, Genres and Publishers with games > 100 copies sold 
graph3 <- game_melt %>% 
    filter(Region == "JP_Sales") %>%
    group_by(Year)%>%
    melt(id.vars=c("Year"),measure.vars=c("Genre","Platform","Publisher")) %>%
    rename(Category='variable') %>% 
    group_by(Year,Category) %>%
    unique() %>%
    count(Year,Category) %>%
    rename(`Counts of Genres, Publishers and Platforms`= n) %>% 
    ggplot() +
    aes(x=Year,
        y=`Counts of Genres, Publishers and Platforms`,
        fill = Category) + 
    geom_bar(stat="identity")+
    theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2))+
    ylab("Counts of Genres, Publishers and Platforms")
ggplotly(graph3)
x<-1
y<-list(x)
is.list(y)
```

```{r}
testfxc1 <-function(reg,plat,gen,pub,years) {
         # Input: List of Regions, Platforms, Genres, Publishers, Min and Max Year
         # Output: Graph
         #
         # Create subset based on filters 
         # Pass to graph
         # Output graph
        if ("Global_Sales" %in% reg){
            filter_region = list("Global_Sales")
        } else {
            filter_region = reg
        }
         if ("all" %in% plat){
             filter_plat = unique(game_melt$Platform)
         } else {
             filter_plat = plat
         }
         if ("all" %in% gen){
             filter_gen = unique(game_melt$Genre)
         } else {
             filter_gen = gen
         }
         if ("all" %in% pub){
             filter_pub = unique(game_melt$Publisher)
         } else {
             filter_pub = pub
         }
         min_year = years[1]
         max_year = years[2]
         
         graph1 <- game_melt[,3:8] %>% 
             subset(Region %in% filter_region & Platform %in% filter_plat & Genre %in% filter_gen & Publisher %in% filter_pub & Year >= min_year & Year <= max_year) %>%
             group_by(Year,Genre) %>%
             summarise("Copies Sold" = sum(`Copies Sold`)) %>% 
             ggplot() +
             aes(x=as.factor(Year),
                 y=`Copies Sold`,
                 fill = Genre) + 
             geom_bar(stat="identity")+
             theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2)) +
             ylab("Total Game Copies Sold (in millions)")
         
         return (ggplotly(graph1))
}

testfxc2 <-function(reg,plat,gen,pub,years) {
        # Input: List of Regions, Platforms, Genres, Publishers, Min and Max Year
        # Output: Graph
        #
        # Create subset based on filters 
        # Pass to graph
        # Output graph
        if ("Global_Sales" %in% reg){
            filter_region = list("Global_Sales")
        } else {
            filter_region = reg
        }
        if ("all" %in% plat){
            filter_plat = unique(game_melt$Platform)
        } else {
            filter_plat = plat
        }
        if ("all" %in% gen){
            filter_gen = unique(game_melt$Genre)
        } else {
            filter_gen = gen
        }
        if ("all" %in% pub){
            filter_pub = unique(game_melt$Publisher)
        } else {
            filter_pub = pub
        }
        min_year = years[1]
        max_year = years[2]
        
        graph2 <- game_melt[,3:8] %>% 
            subset(Region %in% filter_region & Platform %in% filter_plat & Genre %in% filter_gen & Publisher %in% filter_pub & Year >= min_year & Year <= max_year) %>%
            group_by(Year,Genre) %>%
            count(Year,Genre) %>%
            rename(`Number of Releases`="n") %>% 
            ggplot() +
            aes(x=as.factor(Year),
                y=`Number of Releases`,
                fill = Genre) + 
            geom_bar(stat="identity")+
            theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust=0.2))+
            ylab("Number of Games Released")

        return (ggplotly(graph2))
    }
```

```{r}
filter_region = list("JP_Sales")
filter_plat = list("N64")
filter_gen = unique(game_melt$Genre)
filter_pub = unique(game_melt$Publisher)
min_year = 1980
max_year = 1990
test <- game_melt %>% 
    subset(Region %in% filter_region & Platform %in% filter_plat & Genre %in% filter_gen & Publisher %in% filter_pub & Year >= min_year & Year <= max_year)
dim(test)
```
```{r}
x <- game_melt %>% #Initialize Top Genre Card (Tab3)
    group_by(Genre) %>%
    summarise("Copies Sold" = sum(`Copies Sold`)) %>%
    subset(`Copies Sold`== max(`Copies Sold`))
x[1]
```




